#> Loading required package: pacman

CONFIG


countries_of_interest <- 
  list(
    "S. Korea", #"Korea, South",
    "Spain", 
    "Italy" ,
    "Norway",
    "Israel", 
    "France",
    "Switzerland", 
    "Sweden",
    "Germany",
    "Netherlands", 
    "UK"
  )
df <- 
  read_csv("./data/daily_records_worldmeters.csv") %>% 
  select(
    country = Country,
    date = Date,
    confirmed = `Total Cases`,
    critical = Critical,
    recovered = `Total Recovered`,
    death = `Total Deaths`
  ) %>% 
  mutate(
    date = as.Date(date,"%d-%m-%Y"),
  ) %>% 
  group_by(country) %>% 
  arrange(date) %>% 
  mutate(
    active = confirmed - recovered - death,
    new_cases = pmax(confirmed - lag(confirmed),0),
    r_lag_critical = lag(critical),
    r_critical = critical/lag(critical),
    r_critical = ifelse(is.infinite(r_critical) | is.nan(r_critical) | is.na(r_critical), 1, r_critical),
    r_active = active/lag(active),
    r_active = ifelse(is.infinite(r_active) | is.nan(r_active) | is.na(r_active), 1, r_active),
    nday = difftime(date, min(date), units = "days") %>% as.integer() 
) %>% 
  ungroup() %>% 
   arrange(country,date) 
#> Parsed with column specification:
#> cols(
#>   Date = col_character(),
#>   Country = col_character(),
#>   `Total Cases` = col_double(),
#>   `Total Deaths` = col_double(),
#>   `Total Recovered` = col_double(),
#>   Critical = col_double()
#> )


tf <- 
  df %>% 
  filter(country %in% countries_of_interest) %>% 
  as_tsibble(key = country, index = date)

# df %>% filter(country == "France") %>% view

data_for_plot <-
  tf %>% 
  group_by(country) %>% 
  filter(date >= min(date[active>=1])) %>%  
  mutate(nday = difftime(date, min(date), units = "days") %>% as.integer()) %>% 
  ungroup() 

active & critical vs time (log scale)

  data_for_plot %>% 
  ggplot(aes(x = nday, color = country)) +
  geom_line(aes(y = log(critical))) +
  geom_line(aes(y = log(active))) +
  ylab("active & critical (log scale")+
  ggtitle("log(active) & log(critical) vs. time")+
  facet_wrap(~country, scales = "free_x")

optimal lag between active and critical



# active slove vs. critical slope: active-lag(active,1) vs. critical-lag(critical
local({
ccf_active_critival_new_cases <-
  data_for_plot %>%
  update_tsibble(index = nday) %>%
CCF(active-lag(active,1), critical-lag(critical,1))

ccf_active_critival_new_cases %>% autoplot()

ccf_active_critival_new_cases %>%
  filter(abs(lag)<=14) %>%
  group_by(country) %>%
  filter(ccf == max(ccf) & abs(max(ccf))>=0.1) %>%
  arrange(desc(ccf)) %>%
  mutate(ccf = round(ccf,2))
})
#> # A tsibble: 11 x 3 [1]
#> # Key:       country [11]
#> # Groups:    country [11]
#>    country       lag   ccf
#>    <chr>       <lag> <dbl>
#>  1 Italy           3  0.88
#>  2 France         -1  0.8 
#>  3 Sweden          1  0.71
#>  4 Israel          0  0.69
#>  5 Spain           0  0.59
#>  6 Netherlands    -1  0.56
#>  7 Germany        -1  0.55
#>  8 Norway         -2  0.53
#>  9 UK              2  0.47
#> 10 Switzerland    -2  0.38
#> 11 S. Korea       -4  0.22


# active r vs. critical r
local({
  ccf_active_critival <- 
    data_for_plot %>% 
    update_tsibble(index = nday) %>% 
    CCF(r_active, r_critical)
  
  ccf_active_critival %>% 
    autoplot()
  
  ccf_active_critival %>% 
    filter(abs(lag)<=14) %>% 
    group_by(country) %>% 
    arrange(desc(ccf)) %>% 
    mutate(ccf_round = round(ccf,2)) %>% 
    arrange(lag) %>% 
    filter(lag<0, ccf>0.1) %>% 
    pivot_wider(country,names_from=lag,
                values_from = ccf_round)
  # 
  # z %>% 
  #   filter(lag<0) %>%
  #   group_by(country) %>% 
  #   mutate(ccf_normalized = ccf/max(abs(ccf))) %>% 
  #   ungroup() %>% 
  #   ggplot(aes(x=lag, y=country, fill = ccf/ccf_normalized))+
  #   geom_tile()+
  #   scale_fill_gradient(low="white", high="blue") +
  # theme_ipsum()
    
})
#> Warning: Current temporal ordering may yield unexpected results.
#> Suggest to sort by `country`, `lag` first.

#> Warning: Current temporal ordering may yield unexpected results.
#> Suggest to sort by `country`, `lag` first.
#> # A tibble: 8 x 14
#> # Groups:   country [8]
#>   country  `-13` `-12` `-11` `-10`  `-9`  `-8`  `-7`  `-6`  `-5`  `-4`
#>   <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Spain    0.580  0.33 NA     0.22 NA     0.11  0.13  0.12 NA     0.15
#> 2 Sweden   0.16   0.17 NA    NA    NA    NA    NA    NA    NA    NA   
#> 3 S. Kor…  0.11   0.24  0.38  0.27  0.24 NA     0.22  0.32  0.3   0.3 
#> 4 Israel  NA      0.27 NA     0.22 NA     0.23 NA     0.16 NA    NA   
#> 5 France  NA      0.17 NA    NA    NA    NA    NA    NA    NA    NA   
#> 6 Nether… NA      0.13 NA    NA     0.27 NA    NA    NA    NA    NA   
#> 7 Italy   NA     NA    NA    NA    NA     0.13  0.2  NA     0.18 NA   
#> 8 UK      NA     NA    NA    NA    NA    NA     0.2  NA    NA    NA   
#> # … with 3 more variables: `-3` <dbl>, `-2` <dbl>, `-1` <dbl>




#  cdf by  optimal lag - note the lag =  -12  zon
local({
    data_for_plot %>% 
    update_tsibble(index = nday) %>% 
    CCF(r_active, r_critical) %>% 
    arrange(country, lag) %>% 
    mutate(ccf = round(ccf,2)) %>% 
    group_by(country) %>% 
    slice(max(0,which.max(ccf)-3): max(0,which.max(ccf)+3))
})
#> # A tsibble: 66 x 3 [1]
#> # Key:       country [11]
#> # Groups:    country [11]
#>    country   lag   ccf
#>    <chr>   <lag> <dbl>
#>  1 France     -5 -0.21
#>  2 France     -4 -0.03
#>  3 France     -3  0.27
#>  4 France     -2  0.74
#>  5 France     -1  0.1 
#>  6 France      0  0.15
#>  7 France      1  0.02
#>  8 Germany   -11 -0.02
#>  9 Germany   -10  0   
#> 10 Germany    -9 -0.02
#> # … with 56 more rows



# max lag  = -2 ==> active precedes critical by 2.
# tibble(
#   day = 1:9,
#   active = c( 0,  1, 2,3,-4,-5,6,NA,NA),
#   critcal = c(NA, NA,0,1, 2, 3,-4,-5,6)
# ) %>%
#   as_tsibble(index = day) %>%
#   CCF(active,critcal) %>% 
#   arrange(desc(ccf))

r_active & r_critical vs time

  data_for_plot %>% 
  ggplot(aes(x = nday)) +
  geom_line(aes(y = r_critical, color = "critical")) +
  geom_line(aes(y = r_active, color = "active")) +
  # ylab("active & critical (log scale")+
  ggtitle("active rate & critical rate vs. time")+
  ylim(0,2)+
  # guides(color=guide_legend(title="patient type - red - critical, green - active")) +
  facet_wrap(~country, scales = "free")

% active vs. time


OPT_ACTIVE_CRITICAL_LAG <- 7

tf_enriched <- 
  tf %>% 
  update_tsibble(index = nday) %>% 
  group_by(country) %>% 
  filter(date >= min(date[active>=1])) %>%  
  arrange(date) %>% 
  mutate(r_active_lag = lag(active, OPT_ACTIVE_CRITICAL_LAG),
         lag_active = lag(active, OPT_ACTIVE_CRITICAL_LAG),
         prc_critical_active_lagged = 100*critical/lag_active,
         prc_critical_active_lagged = ifelse(is.infinite(prc_critical_active_lagged) | is.nan(prc_critical_active_lagged) | is.na(prc_critical_active_lagged), 0, prc_critical_active_lagged) %>% pmax(0) %>% pmin(100)
         ) %>% 
  ungroup()

(tf_enriched %>% 
  ggplot(aes(x=nday, y=round(prc_critical_active_lagged,2), color = country)) + 
  geom_line() +
    ggtitle("Convert rate - % critcal out of active (7 day lag)")+
  # theme(legend.position='none')+
  facet_wrap(~country, scales = "free_x")) %>% 
  ggplotly